1 Result

2 Setup

pacman::p_load(
  circlize,
  glue,
  here,
  janitor,
  lubridate,
  magrittr,
  skimr,
  tidyverse
)

3 Background

Background information: Tidy Tuesday
Data source: Kaggle

3.1 Tidy Tuesday

3.2 Data Dictionary

4 Raw Data

Get data and write to local file

# get data
olympics <- tidytuesdayR::tt_load(yr, week = wk) %>% 
  pluck("olympics")

regions <- tidytuesdayR::tt_load(yr, week = wk) %>% 
  pluck("regions")
  
# join on NOC region and write to local file 
write_csv(
  left_join(olympics, regions, by = c("noc" = "NOC")), 
  here("data", glue("data_{yr}_{wk}.csv")))

Read data from local file (d_raw) and create working copy (d)

# raw data
d_raw <- read_csv(
  here("data", glue("data_{yr}_{wk}.csv")),
  col_types = cols(.default = "c"),
  na = c("NA", "NULL", "")
)
# working copy
d <- d_raw

5 Inspection

d
glimpse(d)
## Rows: 271,116
## Columns: 17
## $ id     <chr> "1", "2", "3", "4", "5", "5", "5", "5", "5", "5", "6", "6", "6"…
## $ name   <chr> "A Dijiang", "A Lamusi", "Gunnar Nielsen Aaby", "Edgar Lindenau…
## $ sex    <chr> "M", "M", "M", "M", "F", "F", "F", "F", "F", "F", "M", "M", "M"…
## $ age    <chr> "24", "23", "24", "34", "21", "21", "25", "25", "27", "27", "31…
## $ height <chr> "180", "170", NA, NA, "185", "185", "185", "185", "185", "185",…
## $ weight <chr> "80", "60", NA, NA, "82", "82", "82", "82", "82", "82", "75", "…
## $ team   <chr> "China", "China", "Denmark", "Denmark/Sweden", "Netherlands", "…
## $ noc    <chr> "CHN", "CHN", "DEN", "DEN", "NED", "NED", "NED", "NED", "NED", …
## $ games  <chr> "1992 Summer", "2012 Summer", "1920 Summer", "1900 Summer", "19…
## $ year   <chr> "1992", "2012", "1920", "1900", "1988", "1988", "1992", "1992",…
## $ season <chr> "Summer", "Summer", "Summer", "Summer", "Winter", "Winter", "Wi…
## $ city   <chr> "Barcelona", "London", "Antwerpen", "Paris", "Calgary", "Calgar…
## $ sport  <chr> "Basketball", "Judo", "Football", "Tug-Of-War", "Speed Skating"…
## $ event  <chr> "Basketball Men's Basketball", "Judo Men's Extra-Lightweight", …
## $ medal  <chr> NA, NA, NA, "Gold", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ region <chr> "China", "China", "Denmark", "Denmark", "Netherlands", "Netherl…
## $ notes  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…

6 Coercion

d %<>% mutate(across(c("id", "age", "height", "weight", "year"),  ~ as.numeric(.x)))

7 Exploration

7.1 Athletes

skim(d)
Data summary
Name d
Number of rows 271116
Number of columns 17
_______________________
Column type frequency:
character 12
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1.00 2 108 0 134731 0
sex 0 1.00 1 1 0 2 0
team 0 1.00 2 47 0 1184 0
noc 0 1.00 3 3 0 230 0
games 0 1.00 11 11 0 51 0
season 0 1.00 6 6 0 2 0
city 0 1.00 4 22 0 42 0
sport 0 1.00 4 25 0 66 0
event 0 1.00 15 85 0 765 0
medal 231333 0.15 4 6 0 3 0
region 370 1.00 2 32 0 205 0
notes 266077 0.02 5 27 0 21 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 68248.9 39022.29 1 34643 68205 102097 135571 ▇▇▇▇▇
age 9474 0.97 25.6 6.39 10 21 24 28 97 ▇▃▁▁▁
height 60171 0.78 175.3 10.52 127 168 175 183 226 ▁▂▇▂▁
weight 62875 0.77 70.7 14.35 25 60 70 79 214 ▃▇▁▁▁
year 0 1.00 1978.4 29.88 1896 1960 1988 2002 2016 ▁▂▃▆▇

7.1.1 Number of Athletes

athlete_game <- d %>% 
  select(season, year, id) %>% 
  distinct() %>% 
  count(season, year)

ggplot(athlete_game, aes(x = year, y = n, color = season)) +
  geom_line() +
  labs(title = "Number of athletes at the Olympic games") +
  scale_y_continuous(name = "Athletes") +
  scale_x_continuous(name = "Year")

7.1.2 Sex

ggplot(count(d, year, sex), aes(x = as.factor(year), y = n, fill = sex)) +
  ggchicklet::geom_chicklet(position = position_fill(), radius = grid::unit(4, "pt")) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(name = "Year") +
  labs(title = "Sex of Olympic athletes") +
  theme(
    axis.title.y = element_blank(),
    axis.text.x = element_text(size = 8, angle = 90),
    panel.grid.major = element_blank()
  )

7.1.3 Age

Youngest

youngest <- filter(d, age == min(age, na.rm = TRUE))

The youngest athlete —Dimitrios Loundras— was only 10 years old when he participated in the 1896 Summer Olympics (in Gymnastics).

Oldest

oldest <- filter(d, age == max(age, na.rm = TRUE))

The oldest athlete —John Quincy Adams Ward— was 97 years old when he participated in the 1928 Summer Olympics (in Art Competitions).

Age by sport

ggplot(
  filter(d, !is.na(age)), 
  aes(
    x = fct_rev(as.factor(sport)),
    y = age,
    fill = sport
  )) +
  geom_violin(color = NA) +
  scale_fill_manual(values = pal_length(
    pal_spectrum,
    length(unique(d$sport))
  )) +
  coord_flip() +
  theme(legend.position = "none", axis.title.y = element_blank())

7.1.4 Weight

Lightest

lightest <- d %>% 
  group_by(sex) %>% 
  filter(weight == min(weight, na.rm = TRUE)) %>% 
  select(name, sex, height, weight, team, sport) %>% 
  distinct()

The lightest male was Albert Ferdinand “Al” Zerhusen with 28 kg.
The lightest female was Choi Myong-Hui with 25 kg.

lightest

Note: the lightest male may be a data error, considering his height, and unlikely BMI of 8.361.

Heaviest

heaviest <- d %>% 
  group_by(sex) %>% 
  filter(weight == max(weight, na.rm = TRUE)) %>% 
  select(name, sex, height, weight, team, sport) %>% 
  distinct()

The heaviest male was Ricardo Blas, Jr. with 214 kg.
The heaviest female was Olha Vasylivna Korobka with 167 kg.

heaviest

Weight by sport and sex

ggplot(
  filter(d, !is.na(weight)), 
  aes(
    x = weight,
    y = fct_rev(as.factor(sport)),
    fill = sport
  )) +
  geom_boxplot(
    width = 0.7, 
    size = 0.2, 
    outlier.size = 0.02, 
    outlier.alpha = 0.5) +
  scale_fill_manual(values = pal_length(
    pal_spectrum,
    length(unique(d$sport))
  )) +
  scale_x_continuous(limits = c(30, 150)) +
  facet_wrap(~sex) +
  theme(
    legend.position = "none", 
    axis.title.y = element_blank())

7.1.5 Height

Shortest

shortest <- d %>% 
  group_by(sex) %>% 
  filter(height == min(height, na.rm = TRUE)) %>% 
  select(name, sex, height, weight, team, sport) %>% 
  distinct()

The shortest male was Lyton Levison Mphande with 127 cm.
The shortest female was Rosario Briones with 127 cm.

shortest

Tallest

tallest <- d %>% 
  group_by(sex) %>% 
  filter(height == max(height, na.rm = TRUE)) %>% 
  select(name, sex, height, weight, team, sport) %>% 
  distinct()

The tallest male was Yao Ming with 226 cm.
The tallest female was Magorzata Teresa “Margo” Dydek (-Twigg) with 213 cm.

tallest

Height by sport and sex

ggplot(
  filter(d, !is.na(height)), 
  aes(
    x = height,
    y = fct_rev(as.factor(sport)),
    fill = sport
  )) +
  geom_boxplot(
    width = 0.7, 
    size = 0.2, 
    outlier.size = 0.02, 
    outlier.alpha = 0.5) +
  scale_fill_manual(values = pal_length(
    pal_spectrum,
    length(unique(d$sport))
  )) +
  facet_wrap(~sex) +
  theme(
    legend.position = "none", 
    axis.title.y = element_blank())

7.1.6 Participation

Number of games in which the athlete has participated.

d %>% 
  select(id, name, games) %>% 
  distinct() %>% 
  count(id, name) %>% 
  select(-id) %>% 
  arrange(desc(n))

Number of games in which the athlete won a medal

d %>% 
  filter(!is.na(medal)) %>% 
  select(id, name, games) %>% 
  distinct() %>% 
  count(id, name) %>% 
  select(-id) %>% 
  arrange(desc(n))

7.2 Medalists

medalists <- d %>% filter(!is.na(medal))
skim(medalists)
Data summary
Name medalists
Number of rows 39783
Number of columns 17
_______________________
Column type frequency:
character 12
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1.00 4 93 0 28202 0
sex 0 1.00 1 1 0 2 0
team 0 1.00 2 47 0 498 0
noc 0 1.00 3 3 0 149 0
games 0 1.00 11 11 0 51 0
season 0 1.00 6 6 0 2 0
city 0 1.00 4 22 0 42 0
sport 0 1.00 4 25 0 66 0
event 0 1.00 15 85 0 756 0
medal 0 1.00 4 6 0 3 0
region 9 1.00 2 27 0 136 0
notes 39238 0.01 7 27 0 11 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 69407.1 38849.98 4 36494 68990 103462 135563 ▇▇▇▇▇
age 732 0.98 25.9 5.91 10 22 25 29 73 ▃▇▁▁▁
height 8711 0.78 177.6 10.89 136 170 178 185 223 ▁▃▇▂▁
weight 9327 0.77 73.8 15.02 28 63 73 83 182 ▂▇▂▁▁
year 0 1.00 1973.9 33.82 1896 1952 1984 2002 2016 ▂▂▃▆▇

Medals by athlete

medals_athlete <- d %>%
  group_by(id, name) %>%
  count(medal) %>%
  mutate(medal = str_to_lower(medal)) %>%
  pivot_wider(names_from = medal, values_from = n) %>%
  select(-`NA`) %>%
  mutate(across(c("gold", "silver", "bronze"), ~ replace_na(.x, 0))) %>%
  mutate(total = gold + silver + bronze) %>%
  ungroup() %>%
  arrange(desc(total))

sports_athlete <- d %>% 
  select(id, sport) %>% 
  distinct() %>% 
  group_by(id) %>% 
  mutate(nr = row_number()) %>% 
  pivot_wider(names_from = nr, values_from = sport) %>% 
  unite(sports, 2:ncol(.), sep = ", ", na.rm = TRUE) %>% 
  ungroup()

left_join(medals_athlete, sports_athlete, by = "id") %>% 
  select(name, sports, total, gold, silver, bronze)

Male athletes with most olympic medals

medalists %>% 
  filter(sex == "M") %>% 
  count(id, name) %>% 
  slice_max(order_by = n, n = 10) %>% 
  rename(medals = n) %>% 
  rowid_to_column("rank") %>% 
  left_join(sports_athlete) %>% 
  select(-id)

Female athletes with most olympic medals

medalists %>% 
  filter(sex == "F") %>% 
  count(id, name) %>% 
  slice_max(order_by = n, n = 10) %>% 
  rename(medals = n) %>% 
  rowid_to_column("rank") %>% 
  left_join(sports_athlete) %>% 
  select(-id)

7.3 Multipotentialites

With the term “multipotentialite” I refer to athletes with olympic medals in multiple sports.

Filter athletes with medals in more than 1 sport

multipotentialite_id <- medalists %>% 
  group_by(id) %>% 
  count(sport) %>% 
  count(id) %>% 
  filter(n > 1) %>% 
  pull(id)

# number of multipotentialites
n_unique(multipotentialite_id)
## [1] 86
# select multipotentialites among medalists
multipotentialite <- filter(medalists, id %in% multipotentialite_id)
multipotentialite

7.4 Country Stats

7.4.1 Total Medals

Important note: this is just an approximation, because data are at athlete-level and not country-level. Thus, team sports are overrepresented, as a single winning event is assigned to multiple athletes.

Total number of medals per country from 1896 to 2016

medal_count <- d %>% 
  group_by(region) %>% 
  count(medal) %>% 
  mutate(medal = str_to_lower(medal)) %>% 
  pivot_wider(names_from = medal, values_from = n) %>% 
  select(-`NA`) %>% 
  mutate(across(c("gold", "silver", "bronze"), ~replace_na(.x, 0))) %>% 
  mutate(total = gold + silver + bronze) %>% 
  select(region, gold, silver, bronze, total) %>% 
  ungroup()

medal_count

7.4.2 Most Medals

Top 25 countries with most olympic medals
(not very informative —not taking number of athletes and times participated into account)

medal_count %>% 
  slice_max(order_by = total, n = 25) %>% 
  rowid_to_column("rank")

7.4.3 Times Participated

Number of games in which countries have participated

times_participated <- d %>% 
  select(region, games) %>% 
  distinct() %>% 
  group_by(region) %>% 
  count() %>% 
  arrange(desc(n)) %>% 
  rename(events = n)

times_participated

7.4.4 Win Percentage

Number of medalists (any color) divided by number of participants per country per year

win_ratio <- left_join(
    count(d, region, year),
    count(medalists, region, year), 
    by = c("region", "year")) %>% 
  arrange(desc(year)) %>% 
  rename(participants = n.x, medalists = n.y) %>% 
  mutate(medalists = replace_na(medalists, 0)) %>%
  mutate(ratio = formattable::percent(medalists / participants))

win_ratio %>% 
  select(-medalists, -participants) %>% 
  pivot_wider(names_from = year, values_from = ratio)

7.4.5 Highest Win Percentage

Country with highest medalists/participants ratio per year

win_ratio %>% 
  group_by(year) %>% 
  filter(ratio == max(ratio)) %>% 
  select(year, region, ratio, medalists, participants)

7.4.6 No Medals

69 participating countries are still awaiting their first medal.

medal_count %>% filter(total == 0) %>% pull(region)
##  [1] "Albania"                          "American Samoa"                  
##  [3] "Andorra"                          "Angola"                          
##  [5] "Antigua"                          "Aruba"                           
##  [7] "Bangladesh"                       "Belize"                          
##  [9] "Benin"                            "Bhutan"                          
## [11] "Boliva"                           "Bosnia and Herzegovina"          
## [13] "Brunei"                           "Burkina Faso"                    
## [15] "Cambodia"                         "Cape Verde"                      
## [17] "Cayman Islands"                   "Central African Republic"        
## [19] "Chad"                             "Comoros"                         
## [21] "Cook Islands"                     "Democratic Republic of the Congo"
## [23] "Dominica"                         "El Salvador"                     
## [25] "Equatorial Guinea"                "Gambia"                          
## [27] "Guam"                             "Guinea"                          
## [29] "Guinea-Bissau"                    "Honduras"                        
## [31] "Kiribati"                         "Laos"                            
## [33] "Lesotho"                          "Liberia"                         
## [35] "Libya"                            "Madagascar"                      
## [37] "Malawi"                           "Maldives"                        
## [39] "Mali"                             "Malta"                           
## [41] "Marshall Islands"                 "Mauritania"                      
## [43] "Micronesia"                       "Myanmar"                         
## [45] "Nauru"                            "Nicaragua"                       
## [47] "Oman"                             "Palau"                           
## [49] "Palestine"                        "Papua New Guinea"                
## [51] "Republic of Congo"                "Rwanda"                          
## [53] "Saint Kitts"                      "Saint Lucia"                     
## [55] "Saint Vincent"                    "Samoa"                           
## [57] "San Marino"                       "Sao Tome and Principe"           
## [59] "Seychelles"                       "Sierra Leone"                    
## [61] "Solomon Islands"                  "Somalia"                         
## [63] "South Sudan"                      "Swaziland"                       
## [65] "Timor-Leste"                      "Turkmenistan"                    
## [67] "Vanuatu"                          "Virgin Islands, British"         
## [69] "Yemen"

7.5 Sports Stats

7.5.1 Unique Sports

Winter Olympics

sports_winter <- d %>% 
  filter(season == "Winter") %>% 
  pull(sport) %>% 
  unique() %>% 
  sort()

sports_winter
##  [1] "Alpine Skiing"             "Alpinism"                 
##  [3] "Biathlon"                  "Bobsleigh"                
##  [5] "Cross Country Skiing"      "Curling"                  
##  [7] "Figure Skating"            "Freestyle Skiing"         
##  [9] "Ice Hockey"                "Luge"                     
## [11] "Military Ski Patrol"       "Nordic Combined"          
## [13] "Short Track Speed Skating" "Skeleton"                 
## [15] "Ski Jumping"               "Snowboarding"             
## [17] "Speed Skating"

Summer Olympics

sports_summer <- d %>% 
  filter(season == "Summer") %>% 
  pull(sport) %>% 
  unique() %>% 
  sort()

sports_summer
##  [1] "Aeronautics"           "Alpinism"              "Archery"              
##  [4] "Art Competitions"      "Athletics"             "Badminton"            
##  [7] "Baseball"              "Basketball"            "Basque Pelota"        
## [10] "Beach Volleyball"      "Boxing"                "Canoeing"             
## [13] "Cricket"               "Croquet"               "Cycling"              
## [16] "Diving"                "Equestrianism"         "Fencing"              
## [19] "Figure Skating"        "Football"              "Golf"                 
## [22] "Gymnastics"            "Handball"              "Hockey"               
## [25] "Ice Hockey"            "Jeu De Paume"          "Judo"                 
## [28] "Lacrosse"              "Modern Pentathlon"     "Motorboating"         
## [31] "Polo"                  "Racquets"              "Rhythmic Gymnastics"  
## [34] "Roque"                 "Rowing"                "Rugby"                
## [37] "Rugby Sevens"          "Sailing"               "Shooting"             
## [40] "Softball"              "Swimming"              "Synchronized Swimming"
## [43] "Table Tennis"          "Taekwondo"             "Tennis"               
## [46] "Trampolining"          "Triathlon"             "Tug-Of-War"           
## [49] "Volleyball"            "Water Polo"            "Weightlifting"        
## [52] "Wrestling"

Sports that have been part of both the summer and winter Olympics

base::intersect(sports_summer, sports_winter)
## [1] "Alpinism"       "Figure Skating" "Ice Hockey"

7.5.2 Times on Schedule

Times a sport has been on the Olympic schedule, incl. first and last year

sport_stats <- d %>% 
  group_by(sport) %>% 
  summarise(
    first = min(year),
    last = max(year),
    times = n_unique(year)
)

sport_stats

Sports that have been on the Olympic schedule only once

sport_stats %>% 
  filter(times == 1) %>% 
  select(-times, -first) %>% 
  rename(year = last) %>% 
  arrange(desc(year))

Number of unique sports

sport_count <- d %>% 
  select(year, season, sport) %>% 
  distinct() %>% 
  group_by(year, season) %>% 
  count()

ggplot(sport_count, aes(x = year, y = n, color = season)) +
  geom_line() +
  labs(title = "Unique sports at the Olympic games") +
  scale_y_continuous(name = "Sports") +
  scale_x_continuous(name = "Year") +
  scale_fill_discrete(name = "Season")

Number of unique events

event_count <- d %>% 
  select(year, season, event) %>% 
  distinct() %>% 
  group_by(year, season) %>% 
  count()

ggplot(event_count, aes(x = year, y = n, color = season)) +
  geom_line() +
  labs(title = "Unique events at the Olympic games") +
  scale_y_continuous(name = "Events") +
  scale_x_continuous(name = "Year") +
  scale_fill_discrete(name = "Season")

8 Chord Diagram

I decided to create a chord diagram of the crossover between sports by multipotentialites.

8.1 Data Preparation

8.1.1 Pivot Data

Pivot wider (1 row per athlete)

multipotentialite_pivot <- multipotentialite %>% 
  arrange(year) %>% 
  select(id, name, sport) %>% 
  distinct() %>% 
  group_by(id, name) %>% 
  mutate(nr = paste0("sport", row_number())) %>% 
  ungroup() %>% 
  pivot_wider(names_from = nr, values_from = sport)

multipotentialite_pivot

8.1.2 Expand Grid

Expand grid for non-directional/symmetrical chord (A to B == B to A). I decided to keep the chord non-directional, because in some cases (medals won during the same event) it wasn’t clear which medal came first.

bidirectional <- multipotentialite_pivot %>%
  rowwise() %>%
  mutate(grid = list(expand_grid(
      source = c(sport1, sport2, sport3),
      target = c(sport1, sport2, sport3)) %>%
    filter(source != target))) %>%
  select(-starts_with("sport")) %>%
  unnest(grid)

bidirectional

8.1.3 Unique Sports

sports <- medalists %>% 
  filter(id %in% multipotentialite_id) %>% 
  pull(sport) %>% 
  sort() %>% 
  unique()

sports
##  [1] "Art Competitions"          "Athletics"                
##  [3] "Beach Volleyball"          "Biathlon"                 
##  [5] "Bobsleigh"                 "Boxing"                   
##  [7] "Cross Country Skiing"      "Cycling"                  
##  [9] "Diving"                    "Equestrianism"            
## [11] "Fencing"                   "Football"                 
## [13] "Gymnastics"                "Handball"                 
## [15] "Hockey"                    "Luge"                     
## [17] "Modern Pentathlon"         "Nordic Combined"          
## [19] "Polo"                      "Rowing"                   
## [21] "Rugby"                     "Sailing"                  
## [23] "Shooting"                  "Short Track Speed Skating"
## [25] "Skeleton"                  "Ski Jumping"              
## [27] "Speed Skating"             "Swimming"                 
## [29] "Tennis"                    "Tug-Of-War"               
## [31] "Volleyball"                "Water Polo"               
## [33] "Weightlifting"             "Wrestling"
nsports <- length(sports)
nsports
## [1] 34

8.1.4 Create Matrix

Create matrix with all options

m <- left_join(
  expand_grid(source = sports, target = sports), 
  count(bidirectional, source, target),
  by = c("source", "target")) %>%
  mutate(n = replace_na(n, 0)) %>%
  pivot_wider(names_from = target, values_from = n) %>%
  column_to_rownames(var = "source") %>% 
  as.matrix(
    nrow = length(sports), 
    ncol = length(sports))

8.1.5 Set Colors

Color only sports with an frequency greater than 5

# find sports with frequency > 5
colorsport <- bidirectional %>% 
  count(source) %>% 
  filter(n > 5) %>%
  pull(source)

# create color vector with base color
colors <- rep("#e3ded3", times = nsports) 

# add names to color vector
names(colors) <- sports 

# specify manual palette
manual_pal <-
  pal_length(
    c(
      "#da3238",
      "#d43f85",
      "#5d2e91",
      "#3656a4",
      "#107fb8",
      "#3a9e8d",
      "#77bb52"
    ),
    length(colorsport)
  )

# replace base color with color from palette for selected sports
colors[which(sports %in% colorsport)] <- manual_pal

# show colors
colorspace::swatchplot(colors)

8.2 Draw Chord

In a first attempt I created the chord diagram with the circlize package in R.

chordDiagram(
  m,
  symmetric = TRUE,
  grid.col = colors,
  order = colnames(m)
)

This result wasn’t exactly what I had in mind (apologies for not fixing the ugly labels).

The chord diagram is non-directional due to the symmetric nature of the data. Therefore, coloring according to only one of the two sports feels inappropriate. An alternative is to create gradient chords based on the colors of both the source and target sport —instead of only using the source color.

I read a great blog post by Nadieh Bremer on how to achieve this with SVG in D3. Unfortunately, I haven’t figured out a way to do this with R yet. Therefore, I decided to continue the visualization in an Observable notebook using SVG and D3.js.

Link: Observable notebook

8.3 Write Arrays

To use the data in Observable, I formatted the data as javascript arrays (in plain text) for copy-pasting into an Observable notebook.

# main matrix
arr <- "["
for(i in 1:nsports){
  a <- paste0("[", paste(as.character(m[i, ]), collapse = ','), "]" )
  b <- ifelse(i == length(sports), "]", ",")
  arr <- paste0(arr, a, b)
}

# links
write_file(
  arr,
  here("code", "2021_31", "arrays", "links.txt"))
## [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0],[0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],[0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],[0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,1,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0],[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,3,0,0,0,1],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0],[1,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],[0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],[1,0,0,0,0,0,0,0,5,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,3,2],[0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,0,0,1],[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0]]
# sports
write_file(
  paste0('["', paste(sports, collapse='","'), '"]'),
  here("code", "2021_31", "arrays", "nodes.txt"))
## ["Art Competitions","Athletics","Beach Volleyball","Biathlon","Bobsleigh","Boxing","Cross Country Skiing","Cycling","Diving","Equestrianism","Fencing","Football","Gymnastics","Handball","Hockey","Luge","Modern Pentathlon","Nordic Combined","Polo","Rowing","Rugby","Sailing","Shooting","Short Track Speed Skating","Skeleton","Ski Jumping","Speed Skating","Swimming","Tennis","Tug-Of-War","Volleyball","Water Polo","Weightlifting","Wrestling"]
# colors
write_file(
  paste0('["', paste(colors, collapse='","'), '"]'),
  here("code", "2021_31", "arrays", "colors.txt"))
## ["#e3ded3","#da3238","#e3ded3","#e3ded3","#d63962","#e3ded3","#c93d86","#e3ded3","#88348c","#e3ded3","#553594","#e3ded3","#404b9e","#e3ded3","#e3ded3","#e3ded3","#e3ded3","#2b61a9","#e3ded3","#e3ded3","#e3ded3","#e3ded3","#1677b4","#e3ded3","#e3ded3","#e3ded3","#e3ded3","#1f8aa8","#e3ded3","#369b90","#e3ded3","#55ab72","#77bb52","#e3ded3"]

8.4 Observable Plot

Session Info

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/nl_NL.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rvest_1.0.0      skimr_2.1.3      lubridate_1.7.10 janitor_2.1.0   
##  [5] here_1.0.1       glue_1.4.2       circlize_0.4.13  magrittr_2.0.1  
##  [9] forcats_0.5.1    stringr_1.4.0    dplyr_1.0.7      purrr_0.3.4     
## [13] readr_1.4.0      tidyr_1.1.3      tibble_3.1.3     ggplot2_3.3.5   
## [17] tidyverse_1.3.1 
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2          sass_0.4.0          jsonlite_1.7.2     
##  [4] modelr_0.1.8        bslib_0.2.5.1       assertthat_0.2.1   
##  [7] highr_0.9           selectr_0.4-2       cellranger_1.1.0   
## [10] yaml_2.2.1          gdtools_0.2.3       Rttf2pt1_1.3.8     
## [13] pillar_1.6.1        backports_1.2.1     extrafontdb_1.0    
## [16] digest_0.6.27       snakecase_0.11.0    colorspace_2.0-2   
## [19] htmltools_0.5.1.1   hrbrthemes_0.8.0    pkgconfig_2.0.3    
## [22] broom_0.7.8         haven_2.4.1         scales_1.1.1       
## [25] farver_2.1.0        generics_0.1.0      ellipsis_0.3.2     
## [28] pacman_0.5.1        withr_2.4.2         repr_1.1.3         
## [31] cli_3.0.1           crayon_1.4.1        readxl_1.3.1       
## [34] evaluate_0.14       fs_1.5.0            fansi_0.5.0        
## [37] xml2_1.3.2          tools_4.0.5         hms_1.1.0          
## [40] GlobalOptions_0.1.2 lifecycle_1.0.0     ggchicklet_0.5.2   
## [43] munsell_0.5.0       reprex_2.0.0        formattable_0.2.1  
## [46] compiler_4.0.5      jquerylib_0.1.4     systemfonts_1.0.2  
## [49] rlang_0.4.11        grid_4.0.5          rstudioapi_0.13    
## [52] htmlwidgets_1.5.3   labeling_0.4.2      base64enc_0.1-3    
## [55] rmarkdown_2.9       gtable_0.3.0        DBI_1.1.1          
## [58] curl_4.3.2          R6_2.5.0            knitr_1.33         
## [61] extrafont_0.17      utf8_1.2.2          rprojroot_2.0.2    
## [64] shape_1.4.6         stringi_1.7.2       Rcpp_1.0.7         
## [67] vctrs_0.3.8         png_0.1-7           dbplyr_2.1.1       
## [70] tidyselect_1.1.1    xfun_0.24